# create a files directory if it does not exist
if (!dir.exists('step2_spatial_model_files')) {dir.create('step2_spatial_model_files')}
# Check and install if these packages are not found locally on machine
if(require(pacman)==FALSE) install.packages("pacman"); library(pacman)
if(require(devtools)==FALSE) install.packages("devtools")
if(require(urbnmapr)==FALSE) devtools::install_github('UrbanInstitute/urbnmapr'); library(urbnmapr)
if(require(albersusa)==FALSE) devtools::install_github("hrbrmstr/albersusa"); library(albersusa)
# INLA
if(require(INLA) == FALSE){
install.packages(
"INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"),
dep=TRUE)
}; library(INLA)
# ggregplot and INLAutils for visualizing and manipulating INLA output
if('ggregplot' %in% rownames(installed.packages()) == FALSE) devtools::install_github('gfalbery/ggregplot')
if(require(INLAutils) == FALSE) devtools::install_github('timcdlucas/INLAutils'); library(INLAutils)
pacman::p_load(tidyverse, magrittr, janitor, lubridate, hms, skimr, forecast, # data analysis
spdep, # to get the neighbors
MCMCglmm, # required for ggregplot
fontawesome, rsvg, # for fontawesome icons
pander, knitr, DT, sparkline, # for nicely printed outputs
scales, plotly, glue, # for plots
sf, leaflet, tigris, tmap, # for maps
gifski, av) # for creating gif and videosCOVID-19 Vaccines by County
Step 2: Spatial Modeling of the Logistic Growth Curve Paramaters with INLA
1 Objectives of this Document
This is the third Markdown in our analysis workflow (see step0_extract_and_eda.html, step0_transform.html, and step1_logistic_growth.html for our previous analyses). The main objective of this document is to:
- Fit a spatial model to explain the differences in the \(\phi_1, \phi_2\), and \(\phi_3\) parameters obtained from the 2,386 logistic growth curve models for the counties with vaccination data starting from February 01, 2022. Note that we limit the fits to the 2,386 counties with complete data, which are stored in the p_matrix_final.rds (generated by step0_transform.html in our previous analyses).
2 R Setup and Required Packages
In this project, the open-source programming language is used to model the COVID-19 percent fully-vaccinated progression in different U.S. counties. is maintained by an international team of developers who make the language available at The Comprehensive R Archive Network. Readers interested in reusing our code and reproducing our results should have installed locally on their machines. can be installed on a number of different operating systems (see Windows, Mac, and Linux for the installation instructions for these systems). We also recommend using the RStudio interface for . The reader can download RStudio for free by following the instructions at the link. For non-R users, we recommend the Hands-on Programming with R for a brief overview of the software’s functionality. Hereafter, we assume that the reader has an introductory understanding of the programming language.
In the code chunk below, we load the packages used to support our analysis. Our input and output files can also be accessed/ downloaded from fmegahed/vaccines_spatial_and_optimization.
3 A Spatial Model for Explaining the Variability in Growth Curve Paramaters
3.1 Preparing the Data for INLA
In the code chunk below, we merge the vaccines and the growth_curve_models.rds data frames together and save them into an object titled inla_tbl. The merge operation is done in such a way that the number of rows is equal to the number of rows in the vaccines dataset, i.e., all 3,108 contiguous U.S. counties. This will introduce NAs for \(\phi_1, \, \phi_2, \, \text{ and } \phi_3\) parameters for the 722 counties which we did not fit a logistic curve model. Furthermore, we engineer five new features:
- z, which is computed by dividing the phi1 estimate by 100, i.e., we convert the asymptote for the percent vaccinated by county to a fraction.
- z1, where we compute the logit of z. The rationale behind taking the logit of z is that we want to restrict the values of our asymptote to (0, 1) since the fraction of population vaccinated cannot exceed 1, and we do not expect it to be equal to exactly 0 or 1.
- z2 is equal to phi2. We introduce it only to make our notation consistent.
- z3, which is equal to the log() of phi3 since we want to restrict our phi3 (slope parameter) to positive values since we expect that the percent of vaccinated individuals in a given county to increase over the course of our study period.
We reuse the urbnmapr package to obtain the sf geometries for each of the 3,108 counties. We then capitalize on the approach of Paula Moraga, Chapter 5 to convert our sf geometries to a list of neighbours and a graph that can be read and processed by INLA.
# loading the phi parameters from step1_logistic_growth_curves
growth_curve_models = read_rds('data/growth_curve_models.rds')
# loading the vaccines data which contains our three predictor variables
vaccines =
read_rds('data/vaccines.rds') %>%
# selecting name-variables, population and the three predictors
select( fips, recip_county, recip_state, svi_ctgy:perc_rep_votes) %>%
# removing duplicates since we no longer store the vaccination over time
# our predictors do not change over the course of our dataset
unique() %>%
mutate(county_name = paste(recip_county, recip_state, sep = ', ') ) %>%
select(-c( recip_county, recip_state)) %>%
relocate(county_name, .after = 'fips')
# we used right join since we wanted to have NAs for counties where we did not fit
# the logistic growth curve model
inla_tbl = right_join(
growth_curve_models %>% select(-c(bic,mae,rmse,mse)),
vaccines %>% select(-census2019),
by = 'fips') %>%
ungroup() %>%
mutate( z = phi1/100,
z1 = log(z/(1-z)),
z2 = phi2,
z3 = log(phi3),
idarea = row_number()
)
# getting the shape files
counties_sf = get_urbn_map(map = "counties", sf = TRUE) %>%
filter(!state_name %in% c('Alaska', 'Hawaii') )
# getting the neighbors list into inla
nb = poly2nb(counties_sf$geometry)
# based on https://www.paulamoraga.com/book-geospatial/sec-arealdatatheory.html
nb2INLA("data/map.adj", nb)
g = inla.read.graph(filename = "data/map.adj")3.2 Model 1
We capitalize on the popular Besag-York-Mollié (BYM) model and the INLA package to fit the logit model for phi1 (\(z_1 = \text{logit}(\phi_1)\)). We save the results from the model in an object res1, which can be downloaded from fmegahed/vaccines_spatial_and_optimization/data/. Furthermore, we print summaries of the fixed effects, and the model hyperparameters below.
# INLA formula with default priors
# ui_s spatially correlated error terms and vi_s are the temporal correlated error terms
formula1 <- z1 ~ f(idarea, model = "bym", graph = g) + perc_rep_votes + svi_ctgy + metro_status
res1 <- inla(formula1,
family = "gaussian", data = inla_tbl, control.predictor = list(compute = TRUE)
)
# saving the results from model1 into an rds file
write_rds(res1, file = 'data/inla_model1.rds')
# printing select results
cat('The fixed effects for the model are: \n')
round(res1$summary.fixed[, c(1:5,7)], digits = 3) %>% knitr::kable()
cat('A summary of the model hyperparameters: \n')
round(res1$summary.hyperpar[, 1:5], digits = 2) %>% knitr::kable()
cat('The fitted z1 = logit(phi1) values are: \n')
res1$summary.fitted.values %>%
mutate(county_names = inla_tbl$county_name) %>%
relocate(county_names, .before = mean) %>%
DT::datatable(options = list(pageLength = 10, autoWidth = TRUE), rownames = F) %>%
formatRound(2:6, digits = 2) The fixed effects for the model are:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | kld | |
|---|---|---|---|---|---|---|
| (Intercept) | 1.469 | 0.033 | 1.404 | 1.469 | 1.534 | 0 |
| perc_rep_votes | -0.020 | 0.001 | -0.021 | -0.020 | -0.019 | 0 |
| svi_ctgyB | -0.070 | 0.020 | -0.110 | -0.070 | -0.030 | 0 |
| svi_ctgyC | -0.118 | 0.020 | -0.158 | -0.118 | -0.079 | 0 |
| svi_ctgyD | -0.229 | 0.021 | -0.271 | -0.229 | -0.188 | 0 |
| metro_statusNon-metro | -0.059 | 0.016 | -0.091 | -0.059 | -0.028 | 0 |
A summary of the model hyperparameters:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | |
|---|---|---|---|---|---|
| Precision for the Gaussian observations | 15.18 | 0.89 | 12.51 | 14.34 | 18.12 |
| Precision for idarea (iid component) | 17.20 | 1.19 | 13.90 | 18.30 | 21.48 |
| Precision for idarea (spatial component) | 2358.44 | 1993.48 | 301.81 | 1810.99 | 7629.28 |
The fitted z1 = logit(phi1) values are:
3.3 Model 2
Similar to Model1, we capitalize on the popular Besag-York-Mollié (BYM) model and the INLA package to fit a linear model for phi2 (\(z_2 = \phi_2\)). We save the results from the model in an object res2, which can be downloaded from fmegahed/vaccines_spatial_and_optimization/data/. Furthermore, we print summaries of the fixed effects, and the model hyperparameters below.
# INLA formula with default priors
# ui_s spatially correlated error terms and vi_s are the temporal correlated error terms
formula2 <- z2 ~ f(idarea, model = "bym", graph = g) + perc_rep_votes + svi_ctgy + metro_status
res2 <- inla(formula2,
family = "gaussian", data = inla_tbl, control.predictor = list(compute = TRUE)
)
write_rds(res2, file = 'data/inla_model2.rds')
# printing select results
cat('The fixed effects for the model are: \n')
round(res2$summary.fixed[, c(1:5,7)], digits = 3) %>% knitr::kable()
cat('A summary of the model hyperparameters: \n')
round(res2$summary.hyperpar[, 1:5], digits = 2) %>% knitr::kable()
cat('The fitted z2 = phi2 values are: \n')
res2$summary.fitted.values %>%
mutate(county_names = inla_tbl$county_name) %>%
relocate(county_names, .before = mean) %>%
DT::datatable(options = list(pageLength = 10, autoWidth = TRUE), rownames = F) %>%
formatRound(2:6, digits = 2) The fixed effects for the model are:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | kld | |
|---|---|---|---|---|---|---|
| (Intercept) | -5.594 | 0.292 | -6.167 | -5.594 | -5.021 | 0 |
| perc_rep_votes | 0.024 | 0.004 | 0.015 | 0.024 | 0.032 | 0 |
| svi_ctgyB | 0.220 | 0.178 | -0.130 | 0.220 | 0.569 | 0 |
| svi_ctgyC | 0.655 | 0.180 | 0.301 | 0.655 | 1.008 | 0 |
| svi_ctgyD | 0.961 | 0.187 | 0.594 | 0.961 | 1.327 | 0 |
| metro_statusNon-metro | 0.051 | 0.142 | -0.228 | 0.051 | 0.330 | 0 |
A summary of the model hyperparameters:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | |
|---|---|---|---|---|---|
| Precision for the Gaussian observations | 0.10 | 0.00 | 0.10 | 0.10 | 0.11 |
| Precision for idarea (iid component) | 19.80 | 17.14 | 3.21 | 15.00 | 65.30 |
| Precision for idarea (spatial component) | 2082.73 | 2270.45 | 31.95 | 1258.32 | 8078.06 |
The fitted z2 = phi2 values are:
3.4 Model 3
Similar to Models 1 and 2, we capitalize on the popular Besag-York-Mollié (BYM) model and the INLA package to fit model for the log of phi3 (\(z_3 = \log(\phi_3)\)). We save the results from the model in an object res3, which can be downloaded from fmegahed/vaccines_spatial_and_optimization/data/. Furthermore, we print summaries of the fixed effects, and the model hyperparameters below.
# INLA formula with default priors
# ui_s spatially correlated error terms and vi_s are the temporal correlated error terms
formula3 <- z3 ~ f(idarea, model = "bym", graph = g) + perc_rep_votes + svi_ctgy + metro_status
res3 <- inla(formula3,
family = "gaussian", data = inla_tbl, control.predictor = list(compute = TRUE)
)
write_rds(res3, file = 'data/inla_model3.rds')
# printing select results
cat('The fixed effects for the model are: \n')The fixed effects for the model are:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | kld | |
|---|---|---|---|---|---|---|
| (Intercept) | -0.008 | 0.029 | -0.064 | -0.008 | 0.048 | 0 |
| perc_rep_votes | -0.006 | 0.000 | -0.007 | -0.006 | -0.005 | 0 |
| svi_ctgyB | -0.122 | 0.017 | -0.156 | -0.122 | -0.088 | 0 |
| svi_ctgyC | -0.229 | 0.018 | -0.264 | -0.229 | -0.195 | 0 |
| svi_ctgyD | -0.412 | 0.018 | -0.448 | -0.412 | -0.376 | 0 |
| metro_statusNon-metro | 0.103 | 0.014 | 0.076 | 0.103 | 0.131 | 0 |
cat('A summary of the model hyperparameters: \n')A summary of the model hyperparameters:
| mean | sd | 0.025quant | 0.5quant | 0.975quant | |
|---|---|---|---|---|---|
| Precision for the Gaussian observations | 22.97 | 1.54 | 18.66 | 22.67 | 28.07 |
| Precision for idarea (iid component) | 20.87 | 1.34 | 17.17 | 21.05 | 25.34 |
| Precision for idarea (spatial component) | 754.32 | 739.24 | 154.53 | 536.43 | 2705.57 |
cat('The fitted z3 = log(phi3) values are: \n')The fitted z3 = log(phi3) values are:
res3$summary.fitted.values %>%
mutate(county_names = inla_tbl$county_name) %>%
relocate(county_names, .before = mean) %>%
DT::datatable(options = list(pageLength = 10, autoWidth = TRUE), rownames = F) %>%
formatRound(2:6, digits = 2) 4 Visualizing the INLA Model Outputs
4.1 Effect Sizes for all three Models
Given that we used the same covariates across all three models, we visualize the effect sizes across the models below. Our code capitalizes on the ggregplot developmental package, and specifically the Efxplot(), which allows us to create a ggplot2 object containing the Effect Sizes and their 95% credible intervals.
# capitalizing on the Efxplot from the ggregplot package
ggregplot::Efxplot(list(res1, res2, res3), PointSize = 2, BarWidth = 0.5,
tips = 0.25) +
scale_color_brewer(palette = 'Dark2') +
labs(title = 'Effect Sizes and their Credible Intervals') +
theme(legend.position = 'bottom')
4.2 Exploring Each Model in Detail
To provide additional insight, we present the following plots for each model:
- An
autoplot()for each model results. The autoplot is possible due to the INLAutils package.
Per P.3 of their arxiv paper, the
plot provides a visual overview of (top-left): the marginal posterior distributions, prior distributions and credible intervals of the intercept … and coefficients for the covariates …; (top-right): the marginal posterior distributions and credible intervals of model hyperparameters; the marginal posterior mean and 95% credible intervals of any random effects if specified in the model and; (bottom-left) and the linear predictor and fitted values (bottom-right).
To facilitate the visualization of this data, we also present a separate plot of each of those visuals in a dedicated subsection.
- A spatial plot of the posterior means for each of the \(z_1, z_2, \text{ and } z_3\) parameters is also presented. We only show the interactive leaflet version of these plot in this document; however, we also generate and save a static version (with the chunk.option command
include = F). The static versions can be seen at fmegahed/vaccines_spatial_optimization.
4.2.1 Model 1
p = autoplot(res1)
4.2.1.1 Posterior Distribution of the Intercept and Covariates
p[[1]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.1.2 Precision
p[[2]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.1.3 Marginal Posterior Mean and 95% CI of the Spatial Random Effects
p[[3]] +
ylim(-1,1) +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2') +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.1.4 Marginal posterior mean and 95% credible intervals of the linear predictor, and fitted values
p[[4]] +
ylim(-1,1) +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.1.5 Spatial Plot of the Posterior Means
# creating the interactive plot
leaflet_sf =
# getting the shape file for the continental US with loglat projection
counties_sf(proj = 'longlat') %>% filter(!state %in% c('Alaska', 'Hawaii')) %>%
# adding the covariates
right_join(vaccines %>% dplyr::select(-county_name), by = 'fips')
# adding the results from model1
leaflet_sf$model1mean = res1$summary.fitted.values[, "mean"]
# selecting the color palette
leaflet_pal = colorNumeric('YlOrRd', domain = leaflet_sf$model1mean,
na.color = "white")
leaflet() %>% # initializing the leaflet map
setView(lng = -96, lat = 37.8, zoom = 4) %>% # setting the view on Continental US
addTiles() %>% # adding the default tiles
addPolygons(
data = leaflet_sf, stroke = FALSE, fillColor = ~leaflet_pal(leaflet_sf$model1mean), # adding the data
weight = 2, opacity = 1, color = "white", dashArray = "3", fillOpacity = 0.7, # adding color specs
popup = paste(
"County:",leaflet_sf$name, '<br>',
"SVI Cat:",leaflet_sf$svi_ctgy, '<br>',
"Metro Status:",leaflet_sf$metro_status, '<br>',
'% Rep Votes:', round(leaflet_sf$perc_rep_votes, 1), '<br>',
"z1 Posterior Mean:", scales::comma(round(leaflet_sf$model1mean, 1)), '<br>')
) %>% #pop-up Menu
addLegend(position = "bottomleft", pal = leaflet_pal, values = leaflet_sf$model1mean,
title = "z1 Posterior Mean", opacity = 1) # legend formatting4.2.2 Model 2
p = autoplot(res2)
4.2.2.1 Posterior Distribution of the Intercept and Covariates
p[[1]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.2.2 Precision
p[[2]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.2.3 Marginal Posterior Mean and 95% CI of the Spatial Random Effects
p[[3]] +
ylim(-1,1) +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2') +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.2.4 Marginal posterior mean and 95% credible intervals of the linear predictor, and fitted values
p[[4]] +
ylim(-5,5) +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.2.5 Spatial Plot of the Posterior Means
# adding the results from model2
leaflet_sf$model2mean = res2$summary.fitted.values[, "mean"]
# selecting the color palette
leaflet_pal = colorNumeric('YlOrRd', domain = leaflet_sf$model2mean,
na.color = "white")
leaflet() %>% # initializing the leaflet map
setView(lng = -96, lat = 37.8, zoom = 4) %>% # setting the view on Continental US
addTiles() %>% # adding the default tiles
addPolygons(
data = leaflet_sf, stroke = FALSE, fillColor = ~leaflet_pal(leaflet_sf$model2mean), # adding the data
weight = 2, opacity = 1, color = "gray", dashArray = "3", fillOpacity = 0.7, # adding color specs
popup = paste(
"County:",leaflet_sf$name, '<br>',
"SVI Cat:",leaflet_sf$svi_ctgy, '<br>',
"Metro Status:",leaflet_sf$metro_status, '<br>',
'% Rep Votes:', round(leaflet_sf$perc_rep_votes, 1), '<br>',
"z2 Posterior Mean:", scales::comma(round(leaflet_sf$model2mean, 1)), '<br>')
) %>% #pop-up Menu
addLegend(position = "bottomleft", pal = leaflet_pal, values = leaflet_sf$model2mean,
title = "z2 Posterior Mean", opacity = 1) # legend formatting4.2.3 Model 3
p = autoplot(res3)
4.2.3.1 Posterior Distribution of the Intercept and Covariates
p = autoplot(res3)
p[[1]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.3.2 Precision
p[[2]] +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2')
4.2.3.3 Marginal Posterior Mean and 95% CI of the Spatial Random Effects
p[[3]] +
ylim(-1,1) +
geom_line(aes(color = var), size = 1.3) +
scale_color_brewer(palette = 'Dark2') +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.3.4 Marginal posterior mean and 95% credible intervals of the linear predictor, and fitted values
p[[4]] +
ylim(-1,1) +
scale_x_continuous(breaks = pretty_breaks(n=10))
4.2.3.5 Spatial Plot of the Posterior Means
# adding the results from model3
leaflet_sf$model3mean = res3$summary.fitted.values[, "mean"]
# selecting the color palette
leaflet_pal = colorNumeric('YlOrRd', domain = leaflet_sf$model3mean,
na.color = "white")
leaflet() %>% # initializing the leaflet map
setView(lng = -96, lat = 37.8, zoom = 4) %>% # setting the view on Continental US
addTiles() %>% # adding the default tiles
addPolygons(
data = leaflet_sf, stroke = FALSE, fillColor = ~leaflet_pal(leaflet_sf$model3mean), # adding the data
weight = 2, opacity = 1, color = "gray", dashArray = "3", fillOpacity = 0.7, # adding color specs
popup = paste(
"County:",leaflet_sf$name, '<br>',
"SVI Cat:",leaflet_sf$svi_ctgy, '<br>',
"Metro Status:",leaflet_sf$metro_status, '<br>',
'% Rep Votes:', round(leaflet_sf$perc_rep_votes, 1), '<br>',
"z3 Posterior Mean:", scales::comma(round(leaflet_sf$model3mean, 1)), '<br>')
) %>% #pop-up Menu
addLegend(position = "bottomleft", pal = leaflet_pal, values = leaflet_sf$model3mean,
title = "z3 Posterior Mean", opacity = 1) # legend formatting4.3 The Corresponding Logistic Growth Curves
Will add these at some point in time this week.
5 Evaluating the Models’ Predictive Performance
5.1 The \(\mathbf{\Phi}\) Matrix
# a vector of estimated phi1 parameters
z1_post_mean = res1$summary.fitted.values[, 'mean']
phi1_hat_vec = exp(z1_post_mean)/( 1 + exp(z1_post_mean) )
phi1_hat_vec = phi1_hat_vec*100
# a vector of estimated phi2 parameters
phi2_hat_vec = res2$summary.fitted.values[, 'mean']
# a vector of estimated phi3 parameters
z3_post_mean = res3$summary.fitted.values[, 'mean']
phi3_hat_vec = exp(z3_post_mean)
Phi_hat_tbl = tibble(fips = inla_tbl$fips,
county_name = inla_tbl$county_name,
phi1 = inla_tbl$phi1,
phi1_hat = phi1_hat_vec,
phi2 = inla_tbl$phi2,
phi2_hat = phi2_hat_vec,
phi3 = inla_tbl$phi3,
phi3_hat = phi3_hat_vec)
Phi_hat_tbl# A tibble: 3,108 × 8
fips county_name phi1 phi1_hat phi2 phi2_hat phi3 phi3_hat
<chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 01001 Autauga County, AL 44.1 46.7 -3.00 -3.68 0.418 0.486
2 01003 Baldwin County, AL 51.0 49.7 -2.98 -3.79 0.449 0.517
3 01005 Barbour County, AL 45.8 49.4 -3.20 -3.32 0.443 0.483
4 01007 Bibb County, AL 34.7 39.8 -3.00 -3.08 0.467 0.479
5 01009 Blount County, AL 32.1 36.3 -3.04 -3.25 0.437 0.472
6 01011 Bullock County, AL 53.3 60.3 -2.90 -3.99 0.422 0.508
7 01013 Butler County, AL 39.1 45.2 -3.02 -3.22 0.477 0.494
8 01015 Calhoun County, AL 47.5 47.0 -3.05 -3.01 0.443 0.451
9 01017 Chambers County, AL 31.0 42.6 -2.90 -3.53 0.482 0.540
10 01019 Cherokee County, AL 30.6 35.6 -2.76 -3.29 0.479 0.530
# … with 3,098 more rows
5.2 Differences between INLA Estimated Phis and those from Logistic Growth Curve Models
phi_spatial_vs_logistic_growth =
rbind(
forecast::accuracy(object = Phi_hat_tbl$phi1_hat, x = Phi_hat_tbl$phi1),
forecast::accuracy(object = Phi_hat_tbl$phi2_hat, x = Phi_hat_tbl$phi2),
forecast::accuracy(object = Phi_hat_tbl$phi3_hat, x = Phi_hat_tbl$phi3)
)
row.names(phi_spatial_vs_logistic_growth) = c('phi1', 'phi2', 'phi3')
round(phi_spatial_vs_logistic_growth, digits = 2) %>%
knitr::kable()| ME | RMSE | MAE | MPE | MAPE | |
|---|---|---|---|---|---|
| phi1 | -0.11 | 4.01 | 3.02 | -1.45 | 6.24 |
| phi2 | 0.00 | 3.10 | 0.76 | -8.92 | 16.47 |
| phi3 | 0.03 | 0.21 | 0.07 | -0.99 | 10.07 |
5.3 Impact on Predictive Accuracy for the 2386 Counties
5.3.1 MAE By County
# reloading the vaccines data to include the dates
vaccines_w_dates =
read_rds('data/vaccines.rds') %>%
mutate(county_name = paste(recip_county, recip_state, sep = ', ') ) %>%
dplyr::select(-c( recip_county, recip_state)) %>%
relocate(county_name, .after = 'fips')
# Computing the expected number of observations (i.e., months of data) per county
months_from_start =
seq.Date(from = min(vaccines_w_dates$date),
to = max(vaccines_w_dates$date),
by = 'month') %>%
length()
# creating a vector of months from start, which will be our dependent variable representing time
months_from_start = 1:months_from_start
# custom function for logistic growth curve
lgc_fit = function(p1, p2, p3, time = months_from_start){
p1/(1+exp(-(p2+p3*time)))
}
# computing the predictive accuracy with respect to two baselines
## actuals (i.e., CDC reported data) and 'lgc_fitted'
pred_acc_per_county =
# actuals
vaccines_w_dates %>% dplyr::select(fips, county_name, series_complete_pop_pct) %>%
right_join(
Phi_hat_tbl %>% drop_na() %>% dplyr::select(-county_name),
by = 'fips') %>%
group_by(fips) %>%
nest(actuals = series_complete_pop_pct) %>%
mutate(
actuals = map(.x = actuals, .f = pull),
lgc_fitted = pmap(list(phi1, phi2, phi3), .f = lgc_fit),
spatial_fitted = pmap(list(phi1_hat, phi2_hat, phi3_hat), .f = lgc_fit),
metrics_actuals = map2(.x = spatial_fitted, .y = actuals, .f = forecast::accuracy),
metrics_lgc = map2(.x = spatial_fitted, .y = lgc_fitted, .f = forecast::accuracy),
mae_actuals = map_dbl(.x = metrics_actuals, .f = extract2, c(3)),
mae_lgc = map_dbl(.x = metrics_lgc, .f = extract2, c(3))
)
# saving the data
write_rds(pred_acc_per_county, file = 'data/pred_acc_per_county.rds')
# printing the table
### custom_function for spk_line
spk_line_exp_growth = function(phi1, phi2, phi3, time = months_from_start){
lgc_fitted = phi1/(1+exp(-(phi2+phi3*time))) # fitted values
# adding spark lines based on the sparkline package
spk_chr(lgc_fitted, type = 'line')
}
### sparkline to be added to the table
spark_lines = Phi_hat_tbl %>%
group_by(fips) %>%
transmute(
lgc_curve = pmap_chr(.l = list(phi1, phi2, phi3), .f = spk_line_exp_growth),
spatial_curve = pmap_chr(.l = list(phi1_hat, phi2_hat, phi3_hat), .f = spk_line_exp_growth)
)
# Printing the actual table
pred_acc_per_county %>%
ungroup() %>%
dplyr::select(fips, county_name, mae_actuals, mae_lgc) %>%
left_join(y = spark_lines %>% ungroup(), by = 'fips') %>%
dplyr::select(-fips) %>%
DT::datatable(rownames = FALSE, escape = FALSE,
options = list(pageLength = 10,
# adding the htmlwidget to the code
fnDrawCallback = htmlwidgets::JS(
'
function(){
HTMLWidgets.staticRender();
}
'))) %>%
spk_add_deps() %>%
DT::formatRound(columns = c('mae_actuals', 'mae_lgc'), digits = 2)Based on the Bayesian models, the average MAEs are: 4.23 and 3.59 when compared to the CDC reported data and the smoothed/fitted logistic growth curves, respectively.
5.3.2 MAE By State
5.3.2.1 Table
pred_acc_per_county %>%
ungroup() %>%
mutate(state = str_sub(county_name, start = -2)) %>%
group_by(state) %>%
summarise(q1_mae_actuals = quantile(mae_actuals, probs = 0.25),
median_mae_actuals = median(mae_actuals),
q3_mae_actuals = quantile(mae_actuals, probs = 0.75),
q1_mae_lgc = quantile(mae_lgc, probs = 0.25),
median_mae_lgc = median(mae_lgc),
q3_mae_lgc = quantile(mae_lgc, probs = 0.75)) -> pred_acc_per_state
# saving the data
write_rds(pred_acc_per_state, file = 'data/pred_acc_per_state.rds')
pred_acc_per_state %>%
mutate(across(q1_mae_actuals:q3_mae_lgc, round, digits =2)) %>%
DT::datatable(rownames = FALSE, escape = FALSE,
options = list(pageLength = 10))5.3.2.2 Figure
# creating a static visual
states_sf = left_join(
x = states_sf,
y = pred_acc_per_state %>% dplyr::select(state, median_mae_actuals ),
by = c('state_abbv' = 'state'))
tm_shape(states_sf) +
tm_borders(col = "black", lwd = 0.5) +
tm_polygons('median_mae_actuals', title = 'Median MAE', palette = "YlOrRd")
6 Appendix
In this appendix, we print all the packages used in our analysis and their versions to assist with reproducing our results/analysis.
pander(sessionInfo(), compact = TRUE) # printing the session infoR version 4.2.1 (2022-06-23 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
locale: LC_COLLATE=English_United States.utf8, LC_CTYPE=English_United States.utf8, LC_MONETARY=English_United States.utf8, LC_NUMERIC=C and LC_TIME=English_United States.utf8
attached base packages: parallel, stats, graphics, grDevices, utils, datasets, methods and base
other attached packages: MASS(v.7.3-58.1), av(v.0.8.1), gifski(v.1.6.6-1), tmap(v.3.3-3), tigris(v.1.6.1), leaflet(v.2.1.1), glue(v.1.6.2), plotly(v.4.10.0), scales(v.1.2.1), sparkline(v.2.0), DT(v.0.24), knitr(v.1.40), pander(v.0.6.5), rsvg(v.2.3.1), fontawesome(v.0.3.0), MCMCglmm(v.2.34), ape(v.5.6-2), coda(v.0.19-4), spdep(v.1.2-5), sf(v.1.0-8), spData(v.2.0.1), forecast(v.8.17.0), skimr(v.2.1.4), hms(v.1.1.2), lubridate(v.1.8.0), janitor(v.2.1.0), magrittr(v.2.0.3), forcats(v.0.5.2), stringr(v.1.4.1), dplyr(v.1.0.9), purrr(v.0.3.4), readr(v.2.1.2), tidyr(v.1.2.0), tibble(v.3.1.8), tidyverse(v.1.3.2), INLAutils(v.0.0.6), INLA(v.22.05.07), sp(v.1.5-0), foreach(v.1.5.2), Matrix(v.1.4-1), albersusa(v.0.4.1), urbnmapr(v.0.0.0.9002), devtools(v.2.4.4), usethis(v.2.1.6), pacman(v.0.5.1), RColorBrewer(v.1.1-3) and ggplot2(v.3.3.6)
loaded via a namespace (and not attached): utf8(v.1.2.2), tidyselect(v.1.1.2), htmlwidgets(v.1.5.4), grid(v.4.2.1), maptools(v.1.1-4), munsell(v.0.5.0), codetools(v.0.2-18), units(v.0.8-0), miniUI(v.0.1.1.1), withr(v.2.5.0), colorspace(v.2.0-3), highr(v.0.9), uuid(v.1.1-0), rstudioapi(v.0.14), wk(v.0.6.0), TTR(v.0.24.3), labeling(v.0.4.2), repr(v.1.1.4), farver(v.2.1.1), vctrs(v.0.4.1), generics(v.0.1.3), xfun(v.0.32), R6(v.2.5.1), cachem(v.1.0.6), assertthat(v.0.2.1), promises(v.1.2.0.1), nnet(v.7.3-17), googlesheets4(v.1.0.1), rgeos(v.0.5-9), gtable(v.0.3.0), lwgeom(v.0.2-8), processx(v.3.7.0), MatrixModels(v.0.5-0), timeDate(v.4021.104), rlang(v.1.0.4), splines(v.4.2.1), rgdal(v.1.5-32), lazyeval(v.0.2.2), gargle(v.1.2.0), dichromat(v.2.0-0.1), broom(v.1.0.0), s2(v.1.1.0), yaml(v.2.3.5), reshape2(v.1.4.4), abind(v.1.4-5), modelr(v.0.1.9), crosstalk(v.1.2.0), backports(v.1.4.1), httpuv(v.1.6.5), quantmod(v.0.4.20), tensorA(v.0.36.2), tools(v.4.2.1), cubature(v.2.0.4.5), ellipsis(v.0.3.2), jquerylib(v.0.1.4), raster(v.3.5-29), proxy(v.0.4-27), sessioninfo(v.1.2.2), Rcpp(v.1.0.9), plyr(v.1.8.7), base64enc(v.0.1-3), classInt(v.0.4-7), ps(v.1.7.1), prettyunits(v.1.1.1), deldir(v.1.0-6), fracdiff(v.1.5-1), cowplot(v.1.1.1), urlchecker(v.1.0.1), tmaptools(v.3.1-1), zoo(v.1.8-10), haven(v.2.5.1), leafem(v.0.2.0), fs(v.1.5.2), data.table(v.1.14.2), lmtest(v.0.9-40), reprex(v.2.0.2), googledrive(v.2.0.0), pkgload(v.1.3.0), mime(v.0.12), evaluate(v.0.16), xtable(v.1.8-4), XML(v.3.99-0.10), readxl(v.1.4.1), compiler(v.4.2.1), KernSmooth(v.2.23-20), crayon(v.1.5.1), htmltools(v.0.5.3), corpcor(v.1.6.10), later(v.1.3.0), tzdb(v.0.3.0), DBI(v.1.1.3), dbplyr(v.2.2.1), rappdirs(v.0.3.3), boot(v.1.3-28), cli(v.3.3.0), quadprog(v.1.5-8), pkgconfig(v.2.0.3), ggregplot(v.0.1.0), foreign(v.0.8-82), terra(v.1.6-7), xml2(v.1.3.3), bslib(v.0.4.0), rvest(v.1.0.3), snakecase(v.0.11.0), callr(v.3.7.2), digest(v.0.6.29), rmarkdown(v.2.16), cellranger(v.1.1.0), leafsync(v.0.1.0), curl(v.4.3.2), shiny(v.1.7.2), urca(v.1.3-0), lifecycle(v.1.0.1), nlme(v.3.1-159), jsonlite(v.1.8.0), tseries(v.0.10-51), viridisLite(v.0.4.1), fansi(v.1.0.3), pillar(v.1.8.1), lattice(v.0.20-45), fastmap(v.1.1.0), httr(v.1.4.4), pkgbuild(v.1.3.1), xts(v.0.12.1), remotes(v.2.4.2), leaflet.providers(v.1.9.0), png(v.0.1-7), iterators(v.1.0.14), sass(v.0.4.2), class(v.7.3-20), stringi(v.1.7.8), profvis(v.0.3.7), stars(v.0.5-6), memoise(v.2.0.1) and e1071(v.1.7-11)